*_____________________________________________________________________________________________________________________________________________________
*
**# FIRST THINGS
*_____________________________________________________________________________________________________________________________________________________

clear
use "${sharedir}\H_SHARE_D2.dta", replace
/* Other things to note if just using SHARE data:
	drop if mergeid=="no int w.2"			// This drops non-responding eligibles for whom MERGEID isn't defined - see notes on MERGEID in OneNote
	sharetom4 _all		// Missing value codes, using the .ado file supplied by the SHARE team
*/

*_____________________________________________________________________________________________________________________________________________________
*
**# MERGING IN WAVE-SPECIFIC DATA
*_____________________________________________________________________________________________________________________________________________________

/* // Checking what's in each wave
foreach wave in 1 2 4 5 6 {
	local healthfiles 	"ph gs cs pf ws cf mh" 	// In turn: Physical health, Grip strength, Chair Stand, Peak flow, Walking speed, Cognitive Function, Mental Health
	local otherfiles 	"dn ep hh gv_weights gv_isced technical_variables gv_health" 	// Demographics, Employment&Pensions, Housing income, Weights, Education (ISCED)
	local unusedfiles	"ac as br ch co cv_h cv_r dropoff ex ft hc ho imputations iv sp vignettes xt"
	foreach file in `healthfiles' `otherfiles' {
		local var ph041* ph043* ph044* ph045* ph046* 
		capture des `var' using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_`file'.dta"
		if _rc==0	{
			dis _newline(5) "Wave is `wave', file is `file'"
			des `var' using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_`file'.dta"
		/**/ }
	/**/ }
/**/ }
*/

// Mergeing things in
sort mergeid
foreach wave in 1 2 4 5 6 {
	// General variables
	merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_cv_r.dta", keepusing(int_year int_month) gen(_merge`wave'cv)
	merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_gv_health.dta", keepusing(maxgrip) gen(_merge`wave'gvhlth)
	merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_gv_isced.dta", keepusing(isced1997_r) gen(_merge`wave'isced)
	// Misc health & emplyoment variables
	if `wave'==1	{
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ph.dta", keepusing(		 ph054_ ph003_ ph004_ ph005_ ph041_ ph043_ ph044_ ph045_ ph046_) gen(_merge`wave'ph)
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ep.dta", keepusing(ep002_ ep005_) 									gen(_merge`wave'ep)
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ep.dta", keepusing(ep210_ ep071d1 ep071d2 ep071d3 ep071d4 ep071d5 ep071d6 ep071d7 ep071d8 ep071d9 ep071d10 ep071d11 ep208_1 ep208_2 ep208_3 ep208_4 ep208_5 ep208_6 ep208_7 ep208_8 ep208_9 ep208_10 ep208_11) gen(_merge`wave'inc)		
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_gs.dta"  , keepusing(gs002_) 										gen(_merge`wave'gs)
	/**/			}
	if `wave'>1	 	{
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ph.dta", keepusing(ph061_ ph054_ ph003_ ph004_ ph005_ ph041_ ph043_ ph044_ ph045_ ph046_) gen(_merge`wave'ph)
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ep.dta", keepusing(ep210_ ep002_ ep005_ ep125_ ep127* ep128* ep129* ep130*) gen(_merge`wave'ep)
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_technical_variables.dta", keepusing(mn024_ mn101_) gen(_merge`wave'tech)
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_gs.dta"  , keepusing(gs002_ gs010d1 gs010d2 gs010d3 gs010d4 gs010d5 gs010d6 gs010dot gs012_) gen(_merge`wave'gs)
		if `wave'<6			merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ep.dta", keepusing(ep110* ep113* ep114*) gen(_merge`wave'ben)
		rename mn101_	r`wave'mn101_	
		rename mn024_	r`wave'mn024_	
	/**/ }
	// Other health variables
	if inlist(`wave', 1, 2, 4) {
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ph.dta", keepusing(ph010d1 ph010d2 ph010d3 ph010d4 ph010d5 ph010d6 ph010d7 ph010d8 ph010d9 ph010d10 ph010d11) gen(_merge`wave'ph010)
		rename ph010d* 	r`wave'ph010d*
	/**/ }
	if inlist(`wave', 5, 6) {
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ph.dta", keepusing(ph084_ ph087d1 ph087d2 ph087d3 ph087d4 ph087d5 ph087d6 ph087d7) gen(_merge`wave'ph087)
		rename ph08* 	r`wave'ph08*
	/**/ }
	// Disability benefits with imputations
	if inlist(`wave', 1, 6) {
		merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_gv_imputationsv2.dta"    , keepusing(ypen3 ypen3_f ypen6 ypen6_f isced isced_f) gen(_merge`wave'impv6)		
	/**/ }
	if inlist(`wave', 2, 4, 5) {
			merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ep.dta", keepusing(ep071d1 ep071d2 ep071d3 ep071d4 ep071d5 ep071d6 ep071d7 ep071d8 ep071d9 ep071d10 ep208_1 ep208_2 ep208_3 ep208_4 ep208_5 ep208_6 ep208_7 ep208_8 ep208_9 ep208_10) gen(_merge`wave'inc)		
			merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_gv_imputationsv2.dta"    , keepusing(ypen36 ylsp36 ypen36_f ylsp36_f isced isced_f) gen(_merge`wave'impv6)		
	/**/ }
	if inlist(`wave', 6) {
			merge 1:1 mergeid using "${sharedir}\SHARE w`wave'\sharew`wave'_${shareversno}_ep.dta", keepusing(ep671d1 ep671d2 ep671d3 ep671d4 ep671d5 ep671d6 ep671d7 ep671d8 ep671d9 ep671d10 ep671d11 ep671d12 ep671d13 ep208_1 ep208_2 ep208_3 ep208_4 ep208_5 ep208_6 ep208_7 ep208_8 ep208_9 ep208_10 ep208_11 ep208_12 ep208_13) gen(_merge`wave'inc)		
	/**/ }
	// Renaming so that the wave is clear, to match RAND file
	rename isced*	r`wave'isced*
	rename int_*	r`wave'int_*
	rename maxgrip	r`wave'maxgrip
	rename ph0??_ 	r`wave'ph0??_
	rename ep*		r`wave'ep*
	rename gs*		r`wave'gs*
	capture rename y*		r`wave'y*
/**/ }
drop _merge*
* Following two lines are necessary because ph054_ is labelled PROXY, but there's already a label called this in H_SHARE
label define proxySHARE 	-2 `"-2_Refusal"' -1 `"-1_Don't know"' 	1 `"1_Respondent only"'  	2 `"2_Respondent and proxy"'  3 `"3_Proxy only"', modify
label values r*ph054_ r*ep210_ proxySHARE


// This needs doing at this point!
drop if cid==""			// I think these are people in the sample who have never been interviewed (missing on all variables except mergeid, about 1/2 the size of the respondent sample)

 
*_____________________________________________________________________________________________________________________________________________________
*
**# CLEANING EDUCATION (before reshaping)
*_____________________________________________________________________________________________________________________________________________________

numlabel isced, mask(#_) add
capture drop raiscednew
	gen raiscednew = .
capture drop raiscednew2
	gen raiscednew2 = .
capture drop raiscedimp 
	gen raiscedimp = .
capture drop raiscedimp_f 
	gen raiscedimp_f = .
foreach i in 1 2 4 5 6 {
	capture recode r`i'isced1997_r (-2=.r)(-1=.d)(95=.e)(97=.o), gen(r`i'isced1997_r2)
	replace raiscednew   = r`i'isced1997_r 		if missing(raiscednew)
	replace raiscednew2   = r`i'isced1997_r2 	if missing(raiscednew2)
	if `i'~=6 	replace raiscedimp_f = r`i'isced_f 		if missing(raiscedimp) & ~missing(r`i'isced)
	if `i'~=6 	replace raiscedimp   = r`i'isced		if missing(raiscedimp)
	*if `i'==5 	pause Can now test if my generated version matches RAND file - which it doesn't
/**/ }
label values raiscednew raiscednew2  isced
label var raiscednew "R education by ISCED code, ignoring imputations (BBG corrected version)"
mvdecode raiscednew, mv(-2=.r\-1 97=.d\95=.e)
replace raiscednew = raiscednew2 if missing(raiscednew) & (~missing(raiscednew2) | raiscednew2==.o)
drop raiscedimp* raedisced raiscednew2



*_____________________________________________________________________________________________________________________________________________________
*
**# CLEANING HEALTH VARS (before reshaping)
*_____________________________________________________________________________________________________________________________________________________

// Height from closest wave (needed for BMI - sometimes missing at later waves, and presumed not to change much within this age group)
* Checking that really doesn't change much over time
corr r?height if r1agey<67
bysort country: corr r?height if r1agey<67
bysort country: corr r?height if r5agey<67
scatter r1height r5height if country==15 & r5agey<67
graph close
/* On change over time in height:
	In general correlations are over 0.9 between waves
	However, for some countries there seem to be unexplained differences at wave 5 (and a lesser extent, w6).
	The correlations here drop, sometimes to around 0.75.
	Nevertheless, this is still likely to be a pretty good prediction for each person.
*/
* Creating the new variable
capture drop r?height????
egen r1heightnext = rowfirst(r2height r4height 	r5height r6height)
egen r2heightnext = rowfirst(		  r4height 	r5height r6height)
egen r4heightnext = rowfirst(					r5height r6height)
egen r5heightnext = rowfirst(							 r6height)
egen r6heightlast = rowlast(r1height r2height r4height r5height)
egen r5heightlast = rowlast(r1height r2height r4height)
egen r4heightlast = rowlast(r1height r2height)
egen r2heightlast = rowlast(r1height)
foreach wave in 1 2 4 5 6 {
	sum r`wave'height???? if ~missing(r`wave'weight) & missing(r`wave'height)
	// There's a choice at wave 5, oddly - prioritise PREVIOUS height as more likely to be correct (given ageing)
	if `wave'>1		replace r`wave'height = r`wave'heightlast if missing(r`wave'height) & ~missing(r`wave'weight) & ~missing(r`wave'heightlast)
	if `wave'<6		replace r`wave'height = r`wave'heightnext if missing(r`wave'height) & ~missing(r`wave'weight) & ~missing(r`wave'heightnext)
/**/ }


// Grip strength
mvdecode r?maxgrip, mv(-99 -2 -1=.o)
mvdecode r?gs012_, mv(-2=.r\-1=.d)
foreach wave in 1 2 4 5 6 {
	replace r`wave'maxgrip = .r if r`wave'gs002_==4
	if `wave'==1	{
		recode r`wave'gs002_ (1 2 3=0 "0_can use at least one hand")(4=1 "1_cant use both hands")(-2=.r)(-1=.d), gen(r`wave'gs_no_inj)
			label var r`wave'gs_no_inj "No GS w`wave': can't use both hands"
	/**/ }
	else			{
		recode r`wave'gs010d6  (0=0 "0_no injury-related reason")(1=1 "1_injury or surgery on both hands in past 6mths")(-2=.r)(-1=.d), gen(r`wave'gs_no_inj)
			replace r`wave'gs_no_inj=1 if r`wave'gs010d4==1
			replace r`wave'gs_no_inj=0 if ~missing(r`wave'maxgrip) & missing(r`wave'gs_no_inj)
			label var r`wave'gs_no_inj "No GS w`wave': injury or surgery on both hands in past 6mths or tried but failed to complete test"
		recode r`wave'gs010d1  (0=0)(1=1)(-2=.r)(-1=.d), gen(r`wave'gs_no_safe)
			replace r`wave'gs_no_safe=1 if r`wave'gs010d2==1
			replace r`wave'gs_no_safe=0 if ~missing(r`wave'maxgrip) & missing(r`wave'gs_no_safe)
			label var r`wave'gs_no_safe "No GS w`wave': I or R felt unsafe"
		ren r`wave'gs012_ r`wave'gs_effort
			label var r`wave'gs_effort "IWER rating of how much effort R gave to test"
	/**/ }
/**/ }
label define rgs_effort 1 "1_R gave full effort" 2 "2_R was prevented from giving full effort by illness, pain, or other symptoms or discomforts" ///
		3  "3_R did not appear to give full effort, but no obvious reason for this" 9 "9_no answer"
	label values r?gs_effort rgs_effort



	
*_____________________________________________________________________________________________________________________________________________________
*
**# CLEANING EMP VARS
*_____________________________________________________________________________________________________________________________________________________

// Setting month/year variables to be a count of months from the start of SHARE w1 (Jan 2004)
mvdecode r?int_year r?int_month, mv(-9=.n)
mvdecode r?ep130* r?ep129*, mv(-2=.r \ -1=.d)
foreach i in 1 2 4 5 6 {
	gen r`i'intct = .
		label var r`i'intct "Interview month counting from Jan 2004"
		replace r`i'intct = 12*(r`i'int_year - 2004) + (r`i'int_month - 1)
/**/ }


// Setting month/year of each of the working stints since last interview to count from start of SHARE w1 (Jan 2004)
* Getting consistent years for the different waves (currently they're numbered and labelled, and inconsistently...)
foreach wave in 2 4 {
	forvalues loop = 1/13 {
		capture recode r`wave'ep130_`loop' (1=2004 "2004 or earlier")(2=2005)(3=2006)(4=2007)(5=2008)(6=2009)(7=2010)(8=2011)(9=2012)(10=2013)(11=2014), gen(r`wave'ep130Y_`loop')
	/**/ }
/**/ }		
forvalues loop = 1/14 {
	capture recode r5ep130_`loop' (1=2005 "2005 or earlier")(2=2006)(3=2007)(4=2008)(5=2009)(6=2010)(7=2011)(8=2012)(9=2013)(10=2014)		  , gen(r5ep130Y_`loop')
	capture recode r6ep130_`loop' (1=2005 "2005 or earlier")(2=2006)(3=2007)(4=2008)(5=2009)(6=2010)(7=2011)(8=2012)(9=2013)(10=2014)(11=2015), gen(r6ep130Y_`loop')
/**/ }
* Actually setting the value in months from Jan 2004
foreach wave in   2 4 5 6 {
	forvalues loop = 1/14 {
		gen r`wave'wrk_`loop' = .
			label var r`wave'wrk_`loop' "Month of end of working period `loop', counting from Jan 2004"
			capture replace r`wave'wrk_`loop' = 12*(r`wave'ep130Y_`loop' - 2004) + (r`wave'ep129_`loop' - 1)
			capture replace r`wave'wrk_`loop' = r`wave'intct if r`wave'ep129_`loop'==13			// ep129==13 is 'today', for which ep130 is missing
	/**/ }
	egen r`wave'wrk_latest = rowmax(r`wave'wrk*)
	gen  r`wave'wrk_since  = r`wave'intct - r`wave'wrk_latest
	egen r`wave'wrk_latestY = rowmax(r`wave'ep130Y*) if missing(r`wave'wrk_since)
	gen r`wave'wrk_sinceY = r`wave'int_year - r`wave'wrk_latestY if ~missing(r`wave'wrk_latestY) & missing(r`wave'wrk_since)
/**/ }
/* CHECKING
	browse r1intct r1int_year r1int_month if ~missing(r1int_year)
	browse r2ep005_ r2ep002_ r2ep125_ r2wrk_1 r2ep130_1 r2ep129_1 r2wrk_2 r2ep130_2 r2ep129_2 r2wrk_latest r2intct r2wrk_since if ~missing(r2ep130_1)
*/


// The final employment dv
foreach wave in 2 4 5 6 {
	clonevar r`wave'worknew = r`wave'work
	label var r`wave'worknew "BBG edit of R?WORK to look at RECENT work status (w/i 2mths)"
	replace r`wave'worknew = 0 if r`wave'wrk_since>=2  & ~missing(r`wave'wrk_since)
	replace r`wave'worknew = 0 if r`wave'wrk_sinceY>=1 & ~missing(r`wave'wrk_sinceY) & missing(r`wave'wrk_since)
/**/ }
clonevar r1worknew = r1work
*bysort r5mn101_: tab r5work r5worknew, cell




*_____________________________________________________________________________________________________________________________________________________
*
**# CLEANING DISBEN VARS (both EP071 based on income, and EP110 report of current claim)
*_____________________________________________________________________________________________________________________________________________________

*-----------------------------------------------
* YPEN3 (EP071) - RAND version using v5 SHARE
*(uses imputations, but any claim last year rather than current)
*-----------------------------------------------
label define benimpute 0 "0_no income in last year" 1 "1_bens received by SR" 2 "2_bens received by imputation", modify
foreach wave in 1 2 4 5 6 {
	if `wave'==1		{
		recode r`wave'issdi (0=0)(0.1/max=1), gen(r`wave'beninc3_dis)
		replace r`wave'beninc3_dis = 2 if r`wave'ifssdi==7 & r`wave'beninc3_dis==1
	/**/ }
	else				{
		recode r`wave'itssdi (0=0)(0.1/max=1), gen(r`wave'beninc3_dis)
		replace r`wave'beninc3_dis = 2 if r`wave'itfssdi==7 & r`wave'beninc3_dis==1
	/**/ }
	label var r`wave'beninc3_dis "Dis ben income last year (RAND version)"
/**/ }
label values r?beninc3_dis benimpute


// Using this to look at the coding of the underlying EP071 variables as 'disability benefits'
* See also the separate code I used for wave 6 in OneNote
matrix drop _all
foreach wave in 1 2 4 5 {											// NOT wave 6, as doesn't have self-reported disben var
	levelsof(country) if r`wave'beninc3_dis==1																	
		local firstcountry = word("`r(levels)'", 1)																
	foreach country in `r(levels)' {																			
		dis in red "Country is `country', wave is `wave'"
		* Looking at which categories are disability benefits in each country
		matrix outputmean`country'_`wave' 	= [`country'.`wave']
		matrix outputN`country'_`wave' 		= [`country'.`wave']
		global rownamesmean "_"
		global rownamesN 	"_"
		forvalues cat = 1/11 {
			noisily capture sum r`wave'beninc3_dis if r`wave'ep071d`cat'==1 & country==`country'
			matrix outputN`country'_`wave' = outputN`country'_`wave' \  `r(N)'
			if `r(N)'>0		matrix outputmean`country'_`wave' = outputmean`country'_`wave' \  `r(mean)'
			if `r(N)'==0	matrix outputmean`country'_`wave' = outputmean`country'_`wave' \   .
			global rownamesmean = "${rownamesmean} mean`cat'_`wave'"
			global rownamesN 	= "${rownamesN} N`cat'_`wave'"
		/**/ }
		* Combining these into an output matrix
		matrix rownames outputN`country'_`wave' 	= ${rownamesN}
		matrix rownames outputmean`country'_`wave' 	= ${rownamesmean}
		matrix output`country'_`wave' = outputmean`country'_`wave' \ outputN`country'_`wave'
		matrix colnames output`country'_`wave' = "`country'_`wave'"
		* Combining this into a single output matrix
		if "`country'"=="`firstcountry'" & `wave'==1 	matrix output = output`country'_`wave'
		else							 				matrix output = output, output`country'_`wave'
	/**/ }
/**/ }
esttab matrix(output) using "${dodir}\Outputs\9_disbencodes", csv replace

			/*-----------------------------------------------
			* YPEN3 (EP071) - my version using v6 SHARE
			* No longer used as basically identical categories to RAND
			*-----------------------------------------------
			foreach wave in 1 2 4 5 {
				if `wave'==1		{
					recode r`wave'ypen3 (0=0)(0.1/max=1), gen(r`wave'beninc4_dis)
					replace r`wave'beninc4_dis = 1 if r`wave'ypen6>0 & ~missing(r`wave'ypen6) 
					replace r`wave'beninc4_dis = 2 if r`wave'ypen3_f>3  & r`wave'beninc4_dis==1
				/**/ }
				else				{
					recode r`wave'ypen36 (0=0)(0.1/max=1), gen(r`wave'beninc4_dis)
					replace r`wave'beninc4_dis = 2 if r`wave'ypen36_f>3 & r`wave'beninc4_dis==1
				/**/ }
				label var r`wave'beninc4_dis "Dis ben income last year (ypen36 version)"
			/**/ }
			label values r?beninc4_dis benimpute
			*/


*-----------------------------------------------
* EP071 - to compare to Borsch-Supan, w1 only
*-----------------------------------------------
mvdecode r?ep071* r?ep208*, mv(-2=.r\-1=.d)
label define benstat 0 "0_no income in last year" 1 "1_bens received all year" 2 "2_bens received some of year or DK", modify
// List of dis bens by country (syntax is generated in spreadsheet)
local dis_w1_cat2		"13" // This means that in country with SHARE code 13 (Sweden), cat2 is a disben in w1
local dis_w1_cat3		"11, 23, 18, 12, 19, 16, 14, 15, 20"
local dis_w1_cat6		"23, 18, 17, 16, 14, 15"
local dis_w1_cat10		"17, 20"
// Seeing if have disben in each country
capture drop r1beninc1_dis
foreach wave in 1  {
	recode r`wave'ep071d1 (1=0), gen(r`wave'beninc1_dis)  // Just to set missingness correctly
		label var r`wave'beninc1_dis "Dis ben income last year (Borsch-Supan version)"
	clonevar r`wave'benstat1_dis = r`wave'beninc1_dis
		label var r`wave'benstat1_dis "Mths of dis ben income last year (Borsch-Supan version)"
	forvalues i = 1/10 {
		if "`dis_w`wave'_cat`i''"~="" 	replace r`wave'beninc1_dis  = 1 if r`wave'ep071d`i'==1 & inlist(country, `dis_w`wave'_cat`i'')
		if "`dis_w`wave'_cat`i''"~="" 	replace r`wave'benstat1_dis = 1 if r`wave'ep071d`i'==1 & r`wave'ep208_`i'==12 & inlist(country, `dis_w`wave'_cat`i'')
		if "`dis_w`wave'_cat`i''"~="" 	replace r`wave'benstat1_dis = 2 if r`wave'ep071d`i'==1 & r`wave'ep208_`i'~=12 & inlist(country, `dis_w`wave'_cat`i'')
	/**/ }
	replace r`wave'beninc1_dis = . if country==25		// I haven't checked out the benefits in Israel
	replace r`wave'benstat1_dis = . if country==25		// I haven't checked out the benefits in Israel
/**/ }
label values r?benstat1_dis benstat



*-----------------------------------------------
* EP071 - my preferred version from SR income, w1-6
*(full year claims only, but ignores imputations)
*-----------------------------------------------
// List of dis bens by country (syntax is generated in spreadsheet) - make sure that each number is followed by a comma!
* Wave 1
local dis_w1_cat2		"        13" // This means that in country with SHARE code 13 (Sweden), cat2 is a disben in w1
local dis_w1_cat3		"11, 12,     14, 15, 16,     18, 19, 20, 23"
local dis_w1_cat6		"                15, 16, 17, 18,         23"
local dis_w1_cat10		"        13,                         20"
* Wave 2
local dis_w2_cat4		"11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 23, 25, 28, 29, 30"
local dis_w2_cat5		"11,     13,     15,             19,     23, 25,         30"
* Wave 4
local dis_w4_cat4		"11, 12, 13, 14, 15, 16, 17, 18,     20, 23,     28, 29,         32, 33, 34, 35"
local dis_w4_cat5		"11,     13,     15,                     23,         29, 30,     32"
* Wave 5
local dis_w5_cat4		"11, 12, 13, 14, 15, 16, 17, 18,     20, 23, 25, 28,         31,         34, 35"
local dis_w5_cat5		"11,             15,                     23, 25,             31"
* Wave 6 
local dis_w6_cat5		"11, 12, 13,     15, 16, 17, 18,     20, 23,     28"		// Disability #1
local dis_w6_cat6		"11, 12, 13,     15, 16, 17, 18,     20, 23,     28"		// Disability #2
local dis_w6_cat4		"        13,                 18,                 28"		// Sickness #1
local dis_w6_cat7		"        13,                 18,                 28"		// Sickness #2
// Seeing if have disben in each country
foreach wave in 1 2 4 5 {
	recode r`wave'ep071d1 (1=0), gen(r`wave'beninc2_dis)  // Just to set missingness correctly
		label var r`wave'beninc2_dis "Dis ben income last year (my preferred version)"
	clonevar r`wave'benstat2_dis = r`wave'beninc2_dis
		label var r`wave'benstat2_dis "Mths of dis ben income last year (my preferred version)"
	forvalues i = 1/10 {
		if "`dis_w`wave'_cat`i''"~="" 	replace r`wave'beninc2_dis  = 1 if r`wave'ep071d`i'==1 & inlist(country, `dis_w`wave'_cat`i'')
		if "`dis_w`wave'_cat`i''"~="" 	replace r`wave'benstat2_dis = 1 if r`wave'ep071d`i'==1 & r`wave'ep208_`i'==12 & inlist(country, `dis_w`wave'_cat`i'')
		if "`dis_w`wave'_cat`i''"~="" 	replace r`wave'benstat2_dis = 2 if r`wave'ep071d`i'==1 & r`wave'ep208_`i'~=12 & inlist(country, `dis_w`wave'_cat`i'')
	/**/ }
	replace r`wave'beninc2_dis = . if country==25		// I haven't checked out the benefits in Israel
	replace r`wave'benstat2_dis = . if country==25		// I haven't checked out the benefits in Israel
/**/ }
* w6 uses ep671 rather than ep071 (an updated version of the same question)
foreach wave in 6 {
	recode r`wave'ep671d1 (1=0)(-2=.r)(-1=.d), gen(r`wave'beninc2_dis)  // Just to set missingness correctly
		label var r`wave'beninc2_dis "Dis ben income last year (my preferred version)"
	clonevar r`wave'benstat2_dis = r`wave'beninc2_dis
		label var r`wave'benstat2_dis "Mths of dis ben income last year (my preferred version)"
	forvalues i = 1/10 {
		if "`dis_w`wave'_cat`i''"~="" 	replace r`wave'beninc2_dis  = 1 if r`wave'ep671d`i'==1 & inlist(country, `dis_w`wave'_cat`i'')
		if "`dis_w`wave'_cat`i''"~="" 	replace r`wave'benstat2_dis = 1 if r`wave'ep671d`i'==1 & r`wave'ep208_`i'==12 & inlist(country, `dis_w`wave'_cat`i'')
		if "`dis_w`wave'_cat`i''"~="" 	replace r`wave'benstat2_dis = 2 if r`wave'ep671d`i'==1 & r`wave'ep208_`i'~=12 & inlist(country, `dis_w`wave'_cat`i'')
	/**/ }
	replace r`wave'beninc2_dis = . if country==25		// I haven't checked out the benefits in Israel
	replace r`wave'benstat2_dis = . if country==25		// I haven't checked out the benefits in Israel
/**/ }
replace r6beninc2_dis  = . if ~inlist(country, 11, 12, 13, 15, 16, 17, 18, 20, 23, 28)		// I only looked at these countries for w6 for the Boheim-Leoni collaboration
replace r6benstat2_dis = . if ~inlist(country, 11, 12, 13, 15, 16, 17, 18, 20, 23, 28)		// I only looked at these countries for w6 for the Boheim-Leoni collaboration
/* Checks
	tab r1ifssdi if ~missing(r1beninc3_dis) & missing(r1beninc2_dis) & country~=25, m				// This shows the imputed values
	tab r1beninc3_dis r1beninc2_dis if country~=25 & ~inlist(r1ifssdi, 3, 5, 7), m					// My version basically same as RAND for non-imputed values
	tab country if r1beninc3_dis~=r1beninc2_dis & country~=25 & ~inlist(r1ifssdi, 3, 5, 7), m		// A few slight differences, for reasons that aren't clear (perhaps v6 editing compared to the v5 that RAND version comes from)
	// Outputs to spreadsheet to compare to published data
	prop r1beninc1_dis [pw=r1wtsamp], over(country)
	prop r1beninc2_dis [pw=r1wtsamp], over(country)
	prop r1beninc3_dis [pw=r1wtsamp], over(country)
	prop r1benstat1_dis [pw=r1wtsamp], over(country)
*/





*-----------------------------------------------
* EP110 - disben receipt w2-5 (not from income), syntax copied from the cleaning of EMP vars
*-----------------------------------------------
// Prep
mvdecode *ep110* *ep113* *ep114*, mv(-2=.r\-1=.d)
drop r2ep113_5_4-r2ep113_5_20 r2ep114_5_12 		// These just look really weird - only exist for one obs and just look wrong
// Setting month/year of each of the disben/sickness stints since last interview to count from start of SHARE w1 (Jan 2004)
foreach ben in 4 5 {
	if `ben'==4		local benname = "sick"
	if `ben'==5		local benname = "incap"	
	* Getting consistent years for the different waves (currently they're numbered and labelled, and inconsistently...)
	forvalues loop = 1/4 {
		capture recode r2ep114_`ben'_`loop' (1=2004 "2004 or earlier")(2=2005)(3=2006)(4=2007)(5=2008)(6=2009)(7=2010)(8=2011)(9=2012)(10=2013)(11=2014), gen(r2ep114_`ben'Y_`loop')
	/**/ }
	forvalues loop = 1/7 {
		capture recode r4ep114_`ben'_`loop' (1=2005 "2005 or earlier")(2=2006)(3=2007)(4=2008)(5=2009)(6=2010)(7=2011)(8=2012)(9=2013)(10=2014), gen(r4ep114_`ben'Y_`loop')
		capture recode r5ep114_`ben'_`loop' (1=2005 "2005 or earlier")(2=2006)(3=2007)(4=2008)(5=2009)(6=2010)(7=2011)(8=2012)(9=2013)(10=2014), gen(r5ep114_`ben'Y_`loop')
	/**/ }
	* Actually setting the value in months from Jan 2004
	foreach wave in   2 4 5  {
		forvalues loop = 1/7 {
			gen r`wave'`benname'ben_`loop' = .
				label var r`wave'`benname'ben_`loop' "Month of end of `benname'ben period `loop', counting from Jan 2004"
				capture replace r`wave'`benname'ben_`loop' = 12*(r`wave'ep114_`ben'Y_`loop' - 2004) + (r`wave'ep113_`ben'_`loop' - 1)
				capture replace r`wave'`benname'ben_`loop' = 999 if r`wave'ep113_`ben'_`loop'==13			// ==13 is 'today', which actually means still is ongoing (year is missing)
		/**/ }
		egen r`wave'`benname'ben_latest = rowmax(r`wave'`benname'ben*)
		gen  r`wave'`benname'ben_since  = r`wave'intct - r`wave'`benname'ben_latest if r`wave'`benname'ben_latest~=999
		egen r`wave'`benname'ben_latestY = rowmax(r`wave'ep114_`ben'Y*) if missing(r`wave'`benname'ben_since)
		gen r`wave'`benname'ben_sinceY = r`wave'int_year - r`wave'`benname'ben_latestY if ~missing(r`wave'`benname'ben_latestY) & missing(r`wave'`benname'ben_since) & r`wave'`benname'ben_latest~=999
	/**/ }
	// The final benefits dvs
	foreach wave in 2 4 5 {
		clonevar r`wave'bennow_`benname' = r`wave'ep110d`ben'
		label var r`wave'bennow_`benname' "BBG edit of R`wave'EP110 to ignore non-current claims"
		replace r`wave'bennow_`benname' = 0 if ~missing(r`wave'`benname'ben_since)  
		replace r`wave'bennow_`benname' = 0 if ~missing(r`wave'`benname'ben_sinceY) & missing(r`wave'`benname'ben_since)
	/**/ }
/**/ }
// The combined dv
foreach wave in 2 4 5 {
	gen r`wave'bennow_incap_sick = (r`wave'bennow_incap==1 | r`wave'bennow_sick==1 )
	replace r`wave'bennow_incap_sick = r`wave'bennow_incap if missing(r`wave'bennow_incap)
	replace r`wave'bennow_incap_sick = r`wave'bennow_sick  if missing(r`wave'bennow_sick)
	* Setting early Swedish version to missing for separate incap/sick - see note in OneNote/spreadsheet (basically huge change when wording improved w5, earlier results implausible)
	if `wave'~=5		replace r`wave'bennow_incap = .o if country==13 
	if `wave'~=5		replace r`wave'bennow_sick  = .o if country==13 
/**/ }
/* CHECKING - 1st line is for checking months setting, 2nd line is to check the dv only observed for longitudinal waves
	browse r2ep114_4_1 r2ep113_4_1 r2ep114_4_2 r2ep113_4_2 r2int_year r2int_month r2intct r2sickben* if ~missing(r2ep114_4_1)
	browse r2ep114_5_1 r2ep113_5_1 r2ep114_5_2 r2ep113_5_2 r2int_year r2int_month r2intct r2disben* if ~missing(r2ep114_5_1)
	bysort r5mn101_: tab r5ep110d4 r5sickben, cell m
	bysort r5mn101_: tab r5ep110d5 r5disben, cell m
	*/

	
*_____________________________________________________________________________________________________________________________________________________
*
**# RESHAPING TO LONG (NOT WIDE) FORMAT
*_____________________________________________________________________________________________________________________________________________________

order mergeid, first
gen survey = "SHARE"

// Rehsaping
local keepvars 		"r*walk100a r*sita r*chaira r*climsa r*clim1a r*stoopa r*armsa r*pusha r*lifta r*dimea"								// Mobility skills
local keepvars 		"`keepvars' r*dressa r*walkra r*batha r*eata r*beda r*toilta r*mapa r*mealsa r*shopa r*phonea r*medsa r*moneya"		// ADLs/IADL
local keepvars 		"`keepvars' r*shlt r*shltf r*imrc r*dlrc r?height r?weight r?bmi r?maxgrip r?gs_effort r?gs_no_safe r?gs_no_inj" 
local keepvars 		"`keepvars' r*depres r?effort r*sleepr r*whappy r*flone r*fsad r*going r*enlife r*cesd r*cesdm"						// CESD - w2 only
local keepvars 		"`keepvars' r?depress r?pessim r?suicid r?guilt r?sleep r?intrst r?irritb r?appett r?fatig r?concnt r?enjoym r?tearfl r?eurod r?feurod" // Euro-D
local keepvars 		"`keepvars' r?ph010d1 r?ph010d2 r?ph010d3 r?ph010d4 r?ph010d5 r?ph010d6 r?ph010d7 r?ph010d8 r?ph010d9 r?ph010d10 r?ph010d11"			// Symptoms list for past 6mths
local keepvars 		"`keepvars' r*hearte r*hibpe r*stroke r*diabe r*lunge r*arthre r*cancre r*hosp1y r*homcar1y r*doctim1y r*nrshom1y"	// Added for Boheim-Leoni collaboration
local keepvars 		"`keepvars' r*psyche r*asthmae r*hchole r*parkine r*catrcte r*hipfeme"												// Condiitons added for OECD work
local keepvars 		"`keepvars' r*ph084_ r*ph087d1 r*ph087d2 r*ph087d3 r*ph087d4 r*ph087d5 r*ph087d6 r*ph087d7"	// Added for Boheim-Leoni collaboration (these are back pain vars)
local keepvars 		"`keepvars' r*lbrf_s r*work r*worknew r*jhours r*jhour2 r*jweeks_s r*jweek2_s r*iearn r*ifearn r*itearn r*itfearn r?isemp r?itsemp"
local keepvars 		"`keepvars' r*jlasty r*jcten"																						// Added for Boheim-Leoni collaboration
local keepvars 		"`keepvars' r*ph041_ r*ph043_ r*ph044_ r*ph045_ r*ph046_"															// Added for 2023 OECD working paper
local keepvars 		"`keepvars' r*issdi r*ifssdi r*itssdi r*itfssdi r?isret r?itsret r?igxfr r?itgxfr r?ipena r?itpena h?itot h?ittot"
local keepvars 		"`keepvars' inw? r*wtresp r*proxy r*agey r*agem h?child r?mstat r*iwy"																		// Administrative vars
local keepvars 		"`keepvars' r*ph054_ r*ph004_ r*ph005_ r*ph061_ r?mn101_ r?mn024_ r?verbf r*ep210_"											// SHARE-specific vars not from Global Aging Data
local keepvars 		"`keepvars' r?bennow_sick r?bennow_incap_sick r?bennow_incap r?beninc1_dis r?beninc2_dis r?beninc3_dis r?benstat1_dis r?benstat2_dis"	// SHARE-specific ben receipt vars
local crosswavevars	"ragender rabyear rabmonth raedyrs raiscednew survey country " 
keep mergeid cid `keepvars' `crosswavevars'
local reshapevars = subinstr("`keepvars'", "*", "@", .)	   
local reshapevars = subinstr("`reshapevars'", "?", "@", .)	  
reshape long `reshapevars', i(mergeid) j(wave)
label drop agecl		// For some reason this is applied to WAVE, which only serves to confuse


*_____________________________________________________________________________________________________________________________________________________
*
**# CLEANING SHARE-specific VARS
*_____________________________________________________________________________________________________________________________________________________

// Survey things
label var rmn101_	"Baseline vs. longitudinal interview"
label var rmn024_	"Nursing home interview"
label var rworknew "Recent work status (RWORK HRS/ELSA + BBG recode of SHARE)"
ren rph054_			rproxy_ph
	label var rproxy_ph "Proxy status for SHARE ph module (phys hlth)"
ren rep210_			rproxy_ep
	label var rproxy_ep "Proxy status for SHARE ep module (employment)"


// DI vars
label var rbeninc1_dis 	"Dis ben income last year (Borsch-Supan version, w1 only)"
label var rbeninc2_dis 	"Dis ben income for ALL of last year (my preferred version)"
label var rbeninc3_dis 	"Dis ben income last year (RAND version inc imputations)"
label var rbenstat1_dis "Mths of dis ben income last year (Borsch-Supan version)"
label var rbenstat2_dis "Mths of dis ben income last year (my preferred version)"


// Symptoms lists
mvdecode rph010d*, mv(-2=.r\-1=.d)
ren rph010d*	rsymptom*	
label var rsymptom1		"For 6mths: Pain in your back, knees, hips or any other joint"
label var rsymptom2		"For 6mths: Heart trouble or angina, chest pain during exercise"
label var rsymptom3		"For 6mths: Breathlessness, difficulty breathing"
label var rsymptom4		"For 6mths: Persistent cough"
label var rsymptom5		"For 6mths: Swollen legs"
label var rsymptom6		"For 6mths: Sleeping problems"
label var rsymptom7		"For 6mths: Falling down"
label var rsymptom8		"For 6mths: Fear of falling down"
label var rsymptom9		"For 6mths: Dizziness, faints or blackouts"
label var rsymptom10	"For 6mths: Stomach or intestine problems"
label var rsymptom11	"For 6mths: Incontinence or involuntary loss of urine"


// Seeing and hearing
ren rph041_		rsee_screen_SHARE
	label var 	rsee_screen_SHARE		"Vision: whether usually wears glasses (SHARE)"
ren rph043_ 	rhefrnd
ren rph044_ 	rhepap
drop rph045_ 							// Use of hearing aid
ren rph046_		rhehear	


// Single-item health vars
numlabel ph005_ yesno, mask(#_) add
label var rshltf "Position of SRH in SHARE/ELSA"
ren rph004_		rlsi
	label var rlsi	"Long-term ill/disabled"
	recode rlsi (5=0)
ren rph005_		rllsiA
	label var rllsiA	"Limited activities (SHARE)"
recode rllsiA (1 2=1 "1_limited")(3=0 "0_not limited"), gen(rllsiA_B)
	label var rllsiA_B "Limited activities, recoded to 0/1 (SHARE)"
ren rph061_		rwld
	label var rwld		"Health prob limits paid work"
	recode rwld (5=0)
mvdecode rlsi rllsiA rwld, mv(-2=.r\-1=.d)

	
*_____________________________________________________________________________________________________________________________________________________
*
**# FINAL THINGS
*_____________________________________________________________________________________________________________________________________________________

// Tidying up
order _all, sequential
order wave country, after(mergeid)
version 14
table wave, c(count rwalk100a mean rwalk100a)
drop if wave==3

// Saving
compress
save "${sharedir}\SHARE_BB_${versno}.dta", replace

